{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 对活动进行聚类\n",
    "\n",
    "数据来源于Kaggle竞赛：Event Recommendation Engine Challenge，根据\n",
    "    events they’ve responded to in the past\n",
    "    user demographic information\n",
    "    what events they’ve seen and clicked on in our app\n",
    "用户对某个活动是否感兴趣\n",
    "\n",
    "竞赛官网：\n",
    "https://www.kaggle.com/c/event-recommendation-engine-challenge/data"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 导入工具包"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import scipy.sparse as ss\n",
    "import scipy.io as sio\n",
    "\n",
    "#保存数据\n",
    "import pickle\n",
    "\n",
    "#event的特征需要编码\n",
    "from utils import FeatureEng\n",
    "from sklearn.preprocessing import normalize\n",
    "#相似度/距离\n",
    "import scipy.spatial.distance as ssd\n",
    "\n",
    "from sklearn import metrics"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 读取之前算好的测试集和训练集中出现过的活动\n",
    "详见user_event.ipynb"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "number of events in train & test :13418\n"
     ]
    }
   ],
   "source": [
    "#读取训练集和测试集中出现过的活动列表\n",
    "eventIndex = pickle.load(open(\"PE_eventIndex.pkl\", 'rb'))\n",
    "n_events = len(eventIndex)\n",
    "\n",
    "print(\"number of events in train & test :%d\" % n_events)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 特征编码"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "FE = FeatureEng()\n",
    "\n",
    "fin = open(\"events.csv\", 'r')\n",
    "\n",
    "#字段：event_id, user_id,start_time, city, state, zip, country, lat, and lng， 101 columns of words count\n",
    "fin.readline() # skip header\n",
    "\n",
    "#词频特征\n",
    "eventContMatrix = ss.dok_matrix((n_events, 101))\n",
    "\n",
    "for line in fin.readlines():       #fin.readlines()返回一个列表，这是可以list(fin)或者直接迭代文件\n",
    "    cols = line.strip().split(\",\")\n",
    "    eventId = str(cols[0])     #这里转换成字符串不是很明白\n",
    "    \n",
    "    #if eventIndex.has_key(eventId):  #在训练集或测试集中出现\n",
    "    if eventId in eventIndex:\n",
    "        i = eventIndex[eventId] \n",
    "        #词频\n",
    "        for j in range(9, 110):\n",
    "            eventContMatrix[i, j-9] = cols[j]\n",
    "fin.close()\n",
    "\n",
    "#词频，可以考虑我们用这部分特征进行聚类，得到活动的genre（法语词汇genre ：种类、类型）\n",
    "eventContMatrix = normalize(eventContMatrix,\n",
    "    norm=\"l2\", axis=0, copy=False)\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 聚类函数"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.cluster import MiniBatchKMeans\n",
    "\n",
    "# 一个参数点（聚类数据为K）的模型，并评价聚类算法性能\n",
    "def K_cluster_analysis(K, df):\n",
    "    print(\"K-means begin with clusters: {}\".format(K));\n",
    "    \n",
    "    #K-means,在训练集上训练\n",
    "    km = MiniBatchKMeans(n_clusters = K)\n",
    "    km.fit(df)\n",
    "    \n",
    "    #保存预测结果\n",
    "    cluster_result = km.predict(df)\n",
    "\n",
    "    # K值的评估标准\n",
    "    #常见的方法有轮廓系数Silhouette Coefficient和Calinski-Harabasz Index\n",
    "    #这两个分数值越大则聚类效果越好\n",
    "    #CH_score = metrics.calinski_harabaz_score(X_train,mb_kmeans.predict(X_train))\n",
    "    CH_score = metrics.silhouette_score(df,cluster_result)   \n",
    "    print(\"CH_score: {}\".format(CH_score))\n",
    "\n",
    "    return CH_score"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 超参数调优"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "K-means begin with clusters: 10\n",
      "CH_score: 0.04994473092489896\n",
      "K-means begin with clusters: 20\n",
      "CH_score: -0.16000745426833665\n",
      "K-means begin with clusters: 30\n",
      "CH_score: -0.10699171862182187\n",
      "K-means begin with clusters: 40\n",
      "CH_score: -0.08037379017439172\n",
      "K-means begin with clusters: 50\n",
      "CH_score: -0.10552577654365897\n",
      "K-means begin with clusters: 60\n",
      "CH_score: -0.08315418954730339\n",
      "K-means begin with clusters: 70\n",
      "CH_score: -0.0679797606768753\n",
      "K-means begin with clusters: 80\n",
      "CH_score: -0.04831297209231342\n",
      "K-means begin with clusters: 90\n",
      "CH_score: -0.05358865163258121\n",
      "K-means begin with clusters: 100\n",
      "CH_score: -0.04951589510728025\n",
      "[0.04994473092489896, -0.16000745426833665, -0.10699171862182187, -0.08037379017439172, -0.10552577654365897, -0.08315418954730339, -0.0679797606768753, -0.04831297209231342, -0.05358865163258121, -0.04951589510728025]\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "[<matplotlib.lines.Line2D at 0x16d9e8b39e8>]"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYYAAAD8CAYAAABzTgP2AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvNQv5yAAAIABJREFUeJzt3Xu4nOO9//H3NysJkpJYhIQkkrCawxpKs1R2tUoJqWpjF6VaspVN61h1KHJdtWm1Dm1RWueSqBYNJVvrEKFql2LFOQdyQBJCgoQIcvz+/vjO+mXNyqyswxyeWev5vK5rrjk9M/PNZNZ85rnv+7lvc3dEREQadEm6ABERqSwKBhERyaFgEBGRHAoGERHJoWAQEZEcCgYREcmhYBARkRwKBhERyaFgEBGRHF2TLqA9tt56ax80aFDSZYiIdCjTpk171937tLRdhwyGQYMGUV9fn3QZIiIdipm90Zrt1JQkIiI5FAwiIpJDwSAiIjkUDCIikkPBICIiOYoSDGY2xsxeMbM5ZnZOnvs3MbM7svc/ZWaDsrcPMrNPzOz57OnaYtQjIiLtV/BwVTOrAn4HjAYWAs+Y2WR3n9Fos2OBpe6+k5kdAVwCHJ69b66771poHSIiUhzF2GP4AjDH3ee5+yrgdmBsk23GAhOylycB+5qZFeG12+S22+Ba7ZOIiGxUMYJhe2BBo+sLs7fl3cbd1wAfAFtl7xtsZs+Z2WNm9uXmXsTMjjezejOrX7JkSbsKvftuuPzydj1URCQ1ihEM+X75eyu3WQQMdPfdgB8DfzKzLfK9iLtf7+517l7Xp0+LR3TnVVsLc+bAp5+26+EiIqlQjGBYCAxodL0/8FZz25hZV6AX8L67r3T39wDcfRowF/hsEWrKK5OBdetg1qxSvYKISMdXjGB4Bqgxs8Fm1h04ApjcZJvJwLjs5UOBR9zdzaxPtvMaMxsC1ADzilBTXplMnL/8cqleQUSk4yt4VJK7rzGzk4EHgSrgD+4+3cwuBOrdfTJwE3Crmc0B3ifCA2Av4EIzWwOsBX7g7u8XWlNzamqgWzcFg4jIxhRldlV3/zvw9ya3/bTR5U+Bw/I87i7grmLU0BrdusGwYQoGEZGNSd2Rz7W1MH160lWIiFSu1AVDJgOvvw7LlyddiYhIZUplMADMmLHx7URE0iq1waB+BhGR/FIXDIMHw2abKRhERJqTumDo0gVGjFAHtIhIc1IXDBDNSdpjEBHJL7XBsGgRvPde0pWIiFSe1AYDqDlJRCSfVAeDmpNERDaUymDYfnvYYgvtMYiI5JPKYDBTB7SISHNSGQywPhi86ZJCIiIpl+pgeP99ePvtpCsREaksqQ2G2to4V3OSiEiu1AaDhqyKiOSX2mDYZhvo00d7DCIiTaU2GEAjk0RE8kl9MEyfDuvWJV2JiEjlSHUw1NbCRx/B/PlJVyIiUjlSHQzqgBYR2VCqg0FDVkVENpTqYOjdG/r3VzCIiDSW6mAAjUwSEWkq9cFQWwszZ8LatUlXIiJSGVIfDJkMrFwJc+cmXYmISGVQMGjRHhGRHKkPhuHDY30GBYOISEh9MPTsCUOGKBhERBqkPhggOqAVDCIiQcFA9DPMnh2d0CIiaadgIIJhzRp49dWkKxERSZ6CAY1MEhFpTMEADB0KXbsqGEREQMEAQPfuUFOjYBARAQXD/9ewaI+ISNopGLIyGZg3D1asSLoSEZFkKRiyMhlwjwn1RETSrCjBYGZjzOwVM5tjZufkuX8TM7sje/9TZjao0X3nZm9/xcwOKEY97aGRSSIioeBgMLMq4HfA14ARwHfMbESTzY4Flrr7TsDlwCXZx44AjgBqgTHA77PPV3Y77gibbKJ+BhGRYuwxfAGY4+7z3H0VcDswtsk2Y4EJ2cuTgH3NzLK33+7uK939NWBO9vnKrqoqJtTTHoOIpF0xgmF7YEGj6wuzt+Xdxt3XAB8AW7XysQCY2fFmVm9m9UuWLClC2RvSam4iIsUJBstzm7dym9Y8Nm50v97d69y9rk+fPm0ssXUyGVi4EJYtK8nTi4h0CMUIhoXAgEbX+wNvNbeNmXUFegHvt/KxZdPQAa1+BhFJs2IEwzNAjZkNNrPuRGfy5CbbTAbGZS8fCjzi7p69/YjsqKXBQA3wdBFqapfa2jhXMIhImnUt9AncfY2ZnQw8CFQBf3D36WZ2IVDv7pOBm4BbzWwOsadwRPax083sTmAGsAY4yd3XFlpTew0cCJ/5jPoZRCTdLH64dyx1dXVeX19fkuceNQp69IBHHinJ04uIJMbMprl7XUvb6cjnJjQySUTSTsHQRCYDS5bA4sVJVyIikgwFQxPqgBaRtFMwNKE5k0Qk7RQMTfTtC9XVCgYRSS8FQxNm6oAWkXRTMOTREAwdcCSviEjBFAx51NbChx/Cm28mXYmISPkpGPJQB7SIpJmCIY+GIasKBhFJIwVDHlttBf36KRhEJJ0UDM3QyCQRSSsFQzNqa2HGDFi3LulKRETKS8HQjEwGPvkEXnst6UpERMpLwdAMjUwSkbRSMDRjxIg4VzCISNooGJqx+eYwaJCCQUTSR8GwEbW1mn5bRNJHwbARmQzMmgWrVyddiYhI+SgYNiKTiVCYPTvpSkREykfBsBEamSQiaaRg2Ihhw6BLFwWDiKSLgmEjNt0UdtpJHdAiki4KhhZoziQRSRsFQwsyGZgzJ6bHEBFJAwVDCzKZmEhv1qykKxERKQ8FQws0MklE0kbB0IKddoJu3dQBLSLpoWBoQbduMWxVewwikhYKhlbQyCQRSRMFQytkMvDGG7B8edKViIiUnoKhFRo6oGfMSLYOEZFyUDC0Qm1tnKs5SUTSQMHQCoMHw2abKRhEJB0UDK3QpUvsNSgYRCQNFAytpJFJIpIWCoZWymTg7bfhvfeSrkREpLQUDK3U0AGtI6BFpLMrKBjMrNrMppjZ7Oz5ls1sNy67zWwzG9fo9n+Y2Stm9nz2tE0h9ZSS5kwSkbToWuDjzwGmuvvFZnZO9vpPGm9gZtXA+UAd4MA0M5vs7kuzm3zX3esLrKPktt8eevVSMIiUw+rVcNNNcNFFsHYt1NTAZz8bp4bLO+4Im2ySdKWdU6HBMBbYO3t5AvAPmgQDcAAwxd3fBzCzKcAY4M8FvnZZmakDWqTU3OHee+Gcc+CVV+DLX46JLF99FSZPhsWL129rBjvskBsWDec77ABdC/12S4B7/BsXLGj+9K9/wYABpa2j0LduW3dfBODui5ppCtoeWNDo+sLsbQ1uNrO1wF3Az93d872QmR0PHA8wcODAAstun9pamDQp/vPMEilBpNN68kk466z44hs+PILgoINy/9Y++ABmz46gePXV9ZdvvRU+/HD9dt26wZAhuWHRcHm77WIIerm5w7JlzX/hz58PCxfCqlW5j9tkkwiCAQNgn33ieUqtxWAws4eBvnnuGt/K18j3FdrwT/uuu79pZpsTwXAUMDHfk7j79cD1AHV1dWV4azaUycD118fopH79kqhApPOZPRvOPRfuugv69o2/sWOOyf+Lv1cvqKuLU2PusGRJblg0XJ4yBT79dP22PXrEXkjTpqmaGth66/b/6Pvoo43/0l+wAFasyH1MVVU0Uw8YAF/4AhxyyPoQaDj16VP+H6ItBoO779fcfWb2jpn1y+4t9AMW59lsIeubmwD6E01OuPub2fPlZvYn4As0EwyVoHEHtIJBpDCLF8PPfgbXXhu/ii+4AM44A3r2bPtzmcE228TpS1/KvW/duvgl3hAYDecvvAD33ANr1qzftnfv/E1TgwbB0qUb/9JftmzDmvr2jS/32loYM2bDL/2+fSMcKk2hTUmTgXHAxdnze/Ns8yDwi0YjlvYHzjWzrkBvd3/XzLoBBwEPF1hPSTUOhtGjk61FpKP6+GO4/HK45JK4fPzxcP75sO22pXm9Ll1g4MA47btv7n2rV8Prr2+4l/H443DbbRt/3q22ii/3QYOiL2TgwNwv/e22g+7dS/NvKrVCg+Fi4E4zOxaYDxwGYGZ1wA/c/Th3f9/MfgY8k33MhdnbegIPZkOhigiFGwqsp6T69IlfJOqAFmm7tWvhllvgpz+Ft96Cgw+Giy+GoUOTq6lbt9grqKmBAw/Mve+TT2Du3AiL11+H6ur1X/r9+0eTVGdlzfT1VrS6ujqvr09mhOtXvxq/cv7970ReXqTDcYf774ezz44DREeNgssu27DJR0rPzKa5e11L2+nI5zbKZOLDvW5d0pWIVL76+mi++frXYeXKGNX3xBMKhUqnYGijTCZGH8yfn3QlIpXrtdfgyCNh992j6fXqq2Ohq0MO0VDvjkDB0EaaGkOkee+9Bz/+MQwbFiN+xo+HOXPgpJOiPV86BgVDG2k1N5ENffpp9BvsuCNceSUcdVSM7vn5z2GLLZKuTtpKwdBGvXrFiATNsioSfW233hoji84+G/bcE55/Hm68MQ7cko5JwdAOmjNJJI4oHjkSjj46jhieOhX+9jfYeeekK5NCKRjaIZOBmTNzj5gUSYsXXoijePffP472/dOf4JlnYii3dA4KhnbIZGLo3dy5SVciUj4LFsB//Rfsths8/TT8+tcwaxZ85zvJTEonpaP/znbQyCRJk2XLYhrsmhq4/XY488z4UfTjH2s9hM5KwdAOw4fHWGx1QEtntnIlXHFFjDS65BL49rdjjYRLL4Ut867VKJ2FgqEdevSIud61xyCd0cqVMdJo+HA4/fRoOnr2WZg4MRbAkc6vA65xVBk0Mkk6m4UL4brrYj2ExYthl13ggQeik1lHK6eL9hjaKZOJWRdXrky6EpH2c4d//hMOOyymj77oIthjD3jwQXjuOTjgAIVCGmmPoZ0ymZhG+JVX4peVSEeyYkWsN3D11fDSS9FncPrpcOKJMHhw0tVJ0hQM7dQwNcb06QoG6TjmzIHf/x7+8IdYP/lzn4ujlL/znc69voC0jYKhnYYOjTVp1c8glW7dumgauvrqWBehqgoOPRROPhm++EU1FcmGFAzt1L17rAWrYJBKtWwZ3Hwz/O53cdxB376xetoJJ2jNctk4BUMBMplYiESkkrz0Uuwd/PGPsdrgnnvGLKff+lbHXYNYykvBUIBMBu68MzryevZMuhpJs9Wr4d57IxAeeww23RS++91YB2G33ZKuTjoaBUMBGjqgZ86EuhZXURUpvnfegRtugGuvhTffjCGnl14K3/8+bLVV0tVJR6VgKEDjOZMUDFIu7jGJ3VVXxR7r6tVxENo118CBB0bnskghFAwF2HHHmERMHdBSDp9+CnfcEc1F9fWw+ebwgx9Ec9HQoUlXJ52JjnwuQFUVjBihYCjElCkwenQMo5T85s+H886DAQNi2usVK2Kk0Ztvwm9/q1CQ4lMwFEhzJrXPmjWxUPwBB0Rn6YEHxrj6jz9OurLK4A6PPBIjiQYPjtlNv/zlWCVt+vQ4QnnzzZOuUjorBUOBamvjl9uyZUlX0nEsWAD77AO/+EV0kr79NvzoR/EreORImDYt6QqTs2JFHJlcWwv77hvzGJ19NsybB3ffHauk6YA0KTUFQ4EaOqC1NkPr/O//wq67xoLxt90W0zFUV8Pll0ez0vLlMGpUhMbatUlXW1733APDhkWfQY8ecMstMePpL3+p6a6lvBQMBdJqbq2zalWs+PXNb8LAgTG//5FH5m6z337w4ovRfDJ+PHzlK/Daa8nUW04LFsDBB8N//mdMZvfYY7GG8rhxcTyCSLkpGAo0cCB85jMKho2ZNy+Ovr388uhHePLJWCYyn+rqWD7y1lvjCN5ddolfzu5lLbks1qyB3/wmFsR56KE4/mDaNNhrLzUXSbIUDAUyi70GNSXl95e/xJG3s2fDXXfF2PuWfgWbwfe+F3sPI0fCMcfEpG/vvluemsvhmWdg993hjDNg771hxgw46yzo1i3pykQUDEVRW6s9hqY++QR++MNYJ3j48OhT+Na32vYcO+wQo3AuvTT6JnbeOWYJ7cg++ABOOSUWw1m8GCZNin/boEFJVyaynoKhCDIZWLIk/tAFZs2KDuRrr41fwY8/3v4vvqqqeI6nn44pHsaMiS/Wjjas1T1CYPjwGH118skxlcohh6jZSCqPgqEI1AG93sSJMT3IW2/B3/8ev/aL0Tyy665xtO+PfhRH/o4cGR3YHcHrr8NBB8XymX37wlNPxYFpW2yRdGUi+SkYikDBAB99FEfljhsXX9rPPw9f+1pxX2PTTXOHte6xRwzlrNRhratXRzCOGBEjjX7zm9jz2X33pCsT2TgFQxFsu200c6S1A/rFF+PLbuLEWAhm6lTYfvvSvV7jYa3nnRedt5U2rPXJJyMgf/KTOLp75sxYU7mrZieTDkDBUARm6eyAdofrrotf7suWwcMPwwUXlOfLr/Gw1hdfjLWLJ0xIfljr0qUxsd2ee8ble+6Bv/415jkS6SgUDEXSMGdS0l9M5fLBB3DEEfEluNde8MILMV1DOTUe1rrbbtGUddhh8N575a0D4v/9z3+OzuUbboi+kBkzYOzY8tciUigFQ5FkMvDhhzGFQWdXXw+f/3wcl/DLX8bMqNtsk1w9O+wQE85dcglMnlz+Ya1z58ZoqSOPjD2DZ56J/gRNcicdlYKhSNLQAe0OV1wBX/xidKw+9hiccw50qYBPUVVVTDb39NPRzDRmDJx6ahxPUSqrVsWcTplM9ClcdRX8+98RmiIdWUF/0mZWbWZTzGx29nzLZrZ7wMyWmdl9TW4fbGZPZR9/h5l12KXKG5b57Kwd0O+9F/P5nH56jDZ6/vloR680u+4av9hPOy2+qEs1rPXxx6P5avz4GIo6c2Ycm6DV06QzKPS33jnAVHevAaZmr+dzGXBUntsvAS7PPn4pcGyB9SSmuhr69eucewz/+ld84d5/f+wx3HNP/Hsr1WabRZ0PPRR9IaNGFW9Y6/vvw3HHRb/KihVw330x7UcpR2GJlFuhwTAWmJC9PAE4ON9G7j4VWN74NjMz4KvApJYe31F0tkV71q2LL9SvfAW6d4cnnohf4h3lSN3Ro2MivoMPXj+s9fXX2/dc7jECatiwmNTv7LNj7/DrXy9iwSIVotBg2NbdFwFkz9vSBbkVsMzd12SvLwSa/d1lZsebWb2Z1S9ZsqTdBZdSJhMjUSr1gKu2eOedaDI677yYwO7ZZ+OI5o6mujrWSZ44MUYv7bJL24e1vvpqHDtx9NGxzvezz0ZHd8+epatbJEktBoOZPWxmL+c5FToQL9/vzmb/XN39enevc/e6Pn36FPjSpZHJRGdnpR1s1VZTp0bT0T//CddfH8Mwe/VKuqr2M4Ojjsod1vrtb7c8rHXlyjguY+edYzrsa66JZrVddilL2SKJaTEY3H0/d8/kOd0LvGNm/QCy522ZRu5doLeZNRwO1R94q63/gErS0VdzW7MmjlwePRp6944RPv/93x2n6aglDcNaL74Y7r03vvAfeij/tv/4Rxw09z//E0dYz5oVx2xUwggskVIr9GM+GRiXvTwOuLe1D3R3Bx4FDm3P4yvRiBFx3hH7Gd58M9YY/tnP4hd1fX18cXY2VVUxTcVTT8VqaQccEP0mDcNa3303/v377BNDch94IPaY+vZNtGyR8nL3dp+IfoKpwOzseXX29jrgxkbbPQ4sAT4h+hIOyN4+BHgamAP8BdikNa87cuRIr1SDBrkfcUTSVbTN3/7mvtVW7j17uk+cmHQ15fPxx+6nneYO7iNGuF96qXt1tXvXru7nnRf3i3QmQL234jvWvAPO4VBXV+f19fVJl5HXN74RI19eeinpSlq2alWMw//Vr6Ld/M47YejQpKsqvylTYi/hrbfgS1+KdSQajksR6UzMbJq7tziMRHM9FlkmE9MxrF5d2cs0fvxxjLR58kk48UT49a/Tu/B8w7DW+vp4T9SPIGmnP4Eiy2QiFGbPTrqSjTv33AiF226LFcXSGgoNqqth//0VCiKgYCi6hiaISu6AfvTRWEHslFNi4jcRkcYUDEU2bFj86qzUYFi+HL7/fdhppziqWUSkKfUxFNmmm0JNTeUGw5lnwvz5MQmcjtwVkXy0x1AClTpn0gMPxJHMZ5wRU2eLiOSjYCiBTCYWbynlWgBttXRpzAo6YgRceGHS1YhIJVMwlEBtbcxMOmtW0pWsd9pp8PbbMZlc2kcgicjGKRhKoNJWc7vnnpgyevz4WLhGRGRjFAwlsNNOsX5BJQTDu+/CCSfEbKnjxyddjYh0BBqVVALdusWw1aSDwR1++MPoX3j44QgrEZGWaI+hRDKZ5KffvuMOmDRp/ZoCIiKtoWAokdpaeOMN+PDDZF7/7bfhpJNgjz3grLOSqUFEOiYFQ4k0dEDPmFH+13aPBXY+/jiWseyqBkMRaQMFQ4kkOTJpwgS4776Y8iKN02iLSGEUDCUyaBD06FH+YFiwII5Z2GsvOPXU8r62iHQOCoYS6dIl+hnK2QHtDsceC2vXws03awppEWkffXWUUG1tefcYrrsuViP71a9gyJDyva6IdC4KhhLKZGJ00Lvvlv615s2LmVNHj44D2kRE2kvBUEINHdClbk5aty7WLK6qgptuArPSvp6IdG4KhhIqVzBceWWsr3DllTBgQGlfS0Q6PwVDCW23HfTqVdp+hldegfPOg298A8aNK93riEh6KBhKyKy0i/asWRNh0KNHLMCjJiQRKQYFQ4k1BIN78Z/7ssvgqafg97+Hvn2L//wikk4KhhLLZGJ200WLivu8L70E558Phx0Ghx9e3OcWkXRTMJRYKTqgV62Co4+GLbeMvQURkWJSMJRYbW2cF7Of4aKL4Pnno19h662L97wiIqBgKLk+fWCbbYoXDPX1EQxHHw1jxxbnOUVEGlMwlEGxRiZ9+mmMQurbN45ZEBEpBQVDGTSs5rZuXWHP89OfxvoON94IvXsXpzYRkaYUDGWQycCKFTB/fvuf44knYnK844+HMWOKV5uISFMKhjIotAN6xYpoQtphhwgHEZFSUjCUQaHBcO65MGdOrLGw+ebFq0tEJB8FQxn06hWT27UnGB59FK66KlZj23vvopcmIrIBBUOZtGdk0ocfwjHHQE1NrN8sIlIOXZMuIC0yGXjkkZj4rmsr3/Uzz4w1nP/v/2KiPBGRctAeQ5nU1sLKlTB3buu2v/9+uOEGOOss+I//KG1tIiKNKRjKpGHOpNY0Jy1dCscdF2FywQWlrUtEpKmCgsHMqs1sipnNzp5v2cx2D5jZMjO7r8ntt5jZa2b2fPa0ayH1VLLhw2O9hNYEw6mnwuLFMGECbLJJ6WsTEWms0D2Gc4Cp7l4DTM1ez+cy4Khm7jvL3XfNnp4vsJ6K1aMH7Lhjy8Hw17/CH/8I48fDyJHlqU1EpLFCg2EsMCF7eQJwcL6N3H0qsLzA1+rwGqbGaM6SJXDCCbDbbhEMIiJJKDQYtnX3RQDZ823a8RwXmdmLZna5mTXbcGJmx5tZvZnVL1mypL31Jqq2Fl59NTqhm3KHE0+EDz6AiROhW7fy1yciAq0IBjN72MxeznMqxqTP5wLDgN2BauAnzW3o7te7e5271/Xp06cIL11+mQysXQuvvLLhfXfcAZMmwYUXru+oFhFJQosj6t19v+buM7N3zKyfuy8ys37A4ra8eMPeBrDSzG4GzmzL4zuaxiOTdtll/e2LFsXewqhRceyCiEiSCm1KmgyMy14eB9zblgdnwwQzM6J/oojrnFWez342Dm5r3AHtHjOmfvIJ3HILVFUlVp6ICFB4MFwMjDaz2cDo7HXMrM7MbmzYyMweB/4C7GtmC83sgOxdt5nZS8BLwNbAzwusp6J17w5Dh+Z2QN9yC9x3H1x8cdwnIpI0c/eka2izuro6r6+vT7qMdjn88Fiec+7cWJ9h551jFNIjj0AXHW4oIiVkZtPcva6l7fRVVGaZDMybBx99BMceG53RN9+sUBCRyqFJ9MqsoQP61FPh4Yfh2mth8OBkaxIRaUy/U8usIRhuvhn23z86nkVEKon2GMpsyBDYdNOYA+mmm2L+JBGRSqJgKLOqKrj00hi62r9/0tWIiGxIwZCAU05JugIRkeapj0FERHIoGEREJIeCQUREcigYREQkh4JBRERyKBhERCSHgkFERHIoGEREJEeHnHbbzJYAbyRdR4G2Bt5NuogKofcil96PXHo/1iv0vdjB3VtcG7lDBkNnYGb1rZkXPQ30XuTS+5FL78d65Xov1JQkIiI5FAwiIpJDwZCc65MuoILovcil9yOX3o/1yvJeqI9BRERyaI9BRERyKBhKzMwGmNmjZjbTzKab2WnZ26vNbIqZzc6eb5l0reVkZlVm9pyZ3Ze9PtjMnsq+H3eYWfekaywXM+ttZpPMbFb2c/Ifaf18mNnp2b+Tl83sz2a2aZo+G2b2BzNbbGYvN7ot72fBwm/NbI6ZvWhmny9WHQqG0lsDnOHuw4FRwElmNgI4B5jq7jXA1Oz1NDkNmNno+iXA5dn3YylwbCJVJeNK4AF3HwZ8jnhfUvf5MLPtgVOBOnfPAFXAEaTrs3ELMKbJbc19Fr4G1GRPxwPXFKsIBUOJufsid382e3k58Ue/PTAWmJDdbAJwcDIVlp+Z9Qe+DtyYvW7AV4FJ2U1S836Y2RbAXsBNAO6+yt2Xkd7PR1dgMzPrCvQAFpGiz4a7/xN4v8nNzX0WxgITPfwb6G1m/YpRh4KhjMxsELAb8BSwrbsvgggPYJvkKiu7K4CzgXXZ61sBy9x9Tfb6QiI802AIsAS4Odu0dqOZ9SSFnw93fxP4FTCfCIQPgGmk97PRoLnPwvbAgkbbFe29UTCUiZl9BrgL+JG7f5h0PUkxs4OAxe4+rfHNeTZNy3C5rsDngWvcfTdgBSloNson23Y+FhgMbAf0JJpLmkrLZ6MlJfu7UTCUgZl1I0LhNne/O3vzOw27fdnzxUnVV2Z7At80s9eB24lmgiuI3eCu2W36A28lU17ZLQQWuvtT2euTiKBI4+djP+A1d1/i7quBu4Evkt7PRoPmPgsLgQGNtivae6NgKLFs+/lNwEx3/02juyYD47KXxwH3lru2JLj7ue7e390HER2Lj7j7d4FHgUOzm6Xp/XgbWGBmQ7M37QvMIJ2fj/nAKDPrkf27aXhdAr+GAAAAyElEQVQvUvnZaKS5z8Jk4Ojs6KRRwAcNTU6F0gFuJWZmXwIeB15ifZv6eUQ/w53AQOIP4jB3b9rp1KmZ2d7Ame5+kJkNIfYgqoHngO+5+8ok6ysXM9uV6IjvDswDjiF+tKXu82FmFwCHE6P5ngOOI9rNU/HZMLM/A3sTs6i+A5wP3EOez0I2PK8mRjF9DBzj7vVFqUPBICIijakpSUREcigYREQkh4JBRERyKBhERCSHgkFERHIoGEREJIeCQUREcigYREQkx/8DV6yx7RM0L24AAAAASUVORK5CYII=\n",
      "text/plain": [
       "<matplotlib.figure.Figure at 0x16d9a329908>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "# 设置超参数（聚类数目K）搜索范围\n",
    "CH_scores = []\n",
    "Ks = [10,20,30,40,50,60,70,80,90,100]\n",
    "for K in Ks:\n",
    "    ch = K_cluster_analysis(K, eventContMatrix)\n",
    "    CH_scores.append(ch)\n",
    "    \n",
    "print (CH_scores)\n",
    "\n",
    "# 绘制不同聚类数目的模型的性能，找到最佳模型／参数（分数最高）\n",
    "import matplotlib.pyplot as plt\n",
    "%matplotlib inline\n",
    "\n",
    "plt.plot(Ks, np.array(CH_scores), 'b-')"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
